# 构造基本数据集

import pandas as pd
import numpy as np

RATING_PATH = "../../data/ml-20m/ratings.csv"  # userId,movieId,rating,timestamp
MOVIE_RATING_TRAIN_PATH = "../../data/rating_20190226_1_train.csv"
MOVIE_RATING_TEST_PATH = "../../data/rating_20190226_1_test.csv"
TRAIN_RATE = 0.7

# 读取数据
rating = pd.read_csv(RATING_PATH)
print("%s data loaded" % len(rating))

# # 数据混序
rating.sample(frac=1, random_state=10)
print("suffled...")

rating[:int((len(rating))*TRAIN_RATE)].to_csv(MOVIE_RATING_TRAIN_PATH, index=False)
rating[int((len(rating))*TRAIN_RATE+1):].to_csv(MOVIE_RATING_TEST_PATH, index=False)
print("saved...")